{
	"model_name_or_path": "facebook/llama-7b",
	"dataset_name_or_path": "./data",
	"output_dir": "./checkpoints/llama_sft_ckpts",
	"per_device_train_batch_size": 1,
	"gradient_accumulation_steps": 4,
	"per_device_eval_batch_size": 8,
	"eval_accumulation_steps": 16,
	"num_train_epochs": 1,
    "max_steps": 100,
	"learning_rate": 3e-05,
	"warmup_steps": 30,
	"logging_steps": 1,
	"evaluation_strategy": "no",
	"save_strategy": "steps",
	"src_length": 256,
	"max_length": 512,
	"bf16": true,
	"fp16_opt_level": "O2",
	"do_train": true,
	"do_eval": false,
	"disable_tqdm": true,
	"load_best_model_at_end": false,
	"eval_with_do_generation": false,
	"recompute": false,
	"save_total_limit": 1,
	"tensor_parallel_degree": 4,
	"pipeline_parallel_degree": 1,
	"pipeline_parallel_config": "disable_p2p_cache_shape",
	"zero_padding": false,
	"use_flash_attention": true,
	"sharding_parallel_degree": 2,
	"sharding": "stage2",
	"recompute_granularity": "full",
	"autotuner_benchmark": 1,
	"benchmark": 1
}